import os
from self_factory import Data2Index
from self_factory import Data_process

for root, dir, files in os.walk('news_data'):
    for file in files:
        file_path = os.path.join(root, file)
        print(file_path)
        with open(file_path, 'r', encoding='utf8') as f:
            lines = f.readlines()
            title = lines[0]
            body = ''.join(lines[1:]).replace('<p>', '').replace(' ', '')\
                .replace('\n', '').replace('</p>', '').strip()
            lineB_ = title.replace('\n', '') + '\n'
            lineA_ = body + '\n'

            lineA = ' '.join(Data_process.str2list(lineA_, is_cut=True))
            lineB = ' '.join(Data_process.str2list(lineB_, is_cut=True))

            Data_process.writetxt('data/sentence.txt', lineA)
            Data_process.writetxt('data/label.txt', lineB)

